1. HiC seurat downstream analysis

source("/media/maoni/data/CZP/test_data/ColorPalettes.R")
source("/media/maoni/data/CZP/test_data/R_functions.R")
set.seed(123)
work_path <- "/media/maoni/data/CZP/spatial_hic/E1305-hic-24"
sample <- "E1305-hic-24"
higashi_out <- "8_higashi_out_higashi_500kb"
work_path_seurat <- "9_seurat"
embed_type <- "scAB_scale"

setwd(work_path)
dir.create(work_path_seurat)
## Warning in dir.create(work_path_seurat): '9_seurat'已存在
setwd(paste0(work_path, "/", work_path_seurat))
dir.create(embed_type)
## Warning in dir.create(embed_type): 'scAB_scale'已存在
setwd(paste0(work_path,  "/", work_path_seurat, "/", embed_type))
dir.create("Img")
## Warning in dir.create("Img"): 'Img'已存在
dir.create("Matrix")
## Warning in dir.create("Matrix"): 'Matrix'已存在
dir.create("Out")
## Warning in dir.create("Out"): 'Out'已存在
dir.create("Plot")
## Warning in dir.create("Plot"): 'Plot'已存在
file.copy(from=paste0(work_path, "/5_bedpe/stat_clean.csv"), 
          to=paste0(work_path,  "/", work_path_seurat, "/", embed_type, "/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
file.copy(from=paste0(work_path, "/6_crop/stat_cropped.csv"), 
          to=paste0(work_path,  "/", work_path_seurat, "/", embed_type, "/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
file.copy(from=paste0(work_path, "/", higashi_out, "/", sample, "_", embed_type, ".csv"), 
          to=paste0(work_path,  "/", work_path_seurat, "/", embed_type, "/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
# 1 Load data
setwd(paste0(work_path, "/", work_path_seurat, "/", embed_type))
stat_cropped <- read.table("Matrix/stat_cropped.csv", sep =",", header = TRUE, dec =".", stringsAsFactors = F)
iA_iB <- paste(51-stat_cropped$iA, stat_cropped$iB, sep="x")
ssAB_matrix <- read.table(paste0("Matrix/", sample, "_scAB_scale.csv"), sep =",", header = F, dec =".", stringsAsFactors = F)
rownames(ssAB_matrix) <- iA_iB
ssAB_matrix <- as.matrix(ssAB_matrix)
dim(ssAB_matrix)
## [1] 1999 4813
bin_spatial <- read.table(paste0(work_path ,"/", higashi_out, "/", sample, "_scAB_bin.csv"), sep =",", header = FALSE, dec =".", stringsAsFactors = F)
bin_spatial[,1] <-  gsub("b","",bin_spatial$V1)
bin_spatial_merge <- paste(bin_spatial$V1, paste(bin_spatial$V3, bin_spatial$V2, sep="-"), sep=":")
colnames(ssAB_matrix) <- bin_spatial_merge
dim(ssAB_matrix)
## [1] 1999 4813
# 2 create seurat object
assay = "Spatial"
slice = sample
image.dir = "/media/maoni/data/CZP/spatial_hic/seurat/core/50*50_blank_img"
image.nam = paste0(sample, "_fix.png")
coord.nam = "combine_barcode.round2round1_index1_index2.Seurat.txt"
Seurat_HiC = CreateSeuratObject(counts = t(ssAB_matrix), project = sample, assay = assay)
image <- readPNG(source = file.path(image.dir, image.nam))[,,1:3]
scale.factors <- c("tissue_hires_scalef"=1, "fiducial_diameter_fullres"=1, "tissue_lowres_scalef"=1)
tissue.positions <- read.table(file = file.path(image.dir,coord.nam), col.names = c("barcodes", "tissue", "row", "col", "imagerow", "imagecol"), header = FALSE, as.is = TRUE, row.names = 1)
spot.radius <- 0.015
image <- new(Class = "VisiumV1", image = image, scale.factors = scalefactors(spot = scale.factors[1], fiducial = scale.factors[2], hires = scale.factors[1], lowres = scale.factors[3]), coordinates = tissue.positions, spot.radius = spot.radius)
image <- image[Cells(Seurat_HiC)]
DefaultAssay(object = image) <- assay
Seurat_HiC[[slice]] <- image
# 3 Data quality
Seurat_HiC$valid_contact <- log10(stat_cropped$valid_contact)
Seurat_HiC$cis_more_10kb <- log10(stat_cropped$cis_more_10kb)
plot1 <- SpatialFeaturePlot(Seurat_HiC, features = "valid_contact",crop = F, pt.size.factor = 1.6, stroke = NA, min.cutoff = "q3") + theme(aspect.ratio = 1)
plot1$layers[[1]]$aes_params=c(plot1$layers[[1]]$aes_params, shape=22)
plot2 <- SpatialFeaturePlot(Seurat_HiC, features = "cis_more_10kb",crop = F, pt.size.factor = 1.6, stroke = NA, min.cutoff = "q3") + theme(aspect.ratio = 1) 
plot2$layers[[1]]$aes_params=c(plot2$layers[[1]]$aes_params, shape=22)
plot_grid(plot1, plot2)

# 4 Run PCA
Seurat_HiC <- SCTransform(Seurat_HiC, assay = "Spatial", verbose = T, variable.features.n = 2000)
## Calculating cell attributes from input UMI matrix: log_umi
## Variance stabilizing transformation of count matrix of size 4813 by 1999
## Model formula is y ~ log_umi
## Get Negative Binomial regression parameters per gene
## Using 2000 genes, 1999 cells
##   |                                                                                                                                                                          |                                                                                                                                                                  |   0%  |                                                                                                                                                                          |========================================                                                                                                                          |  25%  |                                                                                                                                                                          |=================================================================================                                                                                 |  50%  |                                                                                                                                                                          |==========================================================================================================================                                        |  75%  |                                                                                                                                                                          |==================================================================================================================================================================| 100%
## Found 43 outliers - those will be ignored in fitting/regularization step
## Second step: Get residuals using fitted parameters for 4813 genes
##   |                                                                                                                                                                          |                                                                                                                                                                  |   0%  |                                                                                                                                                                          |================                                                                                                                                                  |  10%  |                                                                                                                                                                          |================================                                                                                                                                  |  20%  |                                                                                                                                                                          |=================================================                                                                                                                 |  30%  |                                                                                                                                                                          |=================================================================                                                                                                 |  40%  |                                                                                                                                                                          |=================================================================================                                                                                 |  50%  |                                                                                                                                                                          |=================================================================================================                                                                 |  60%  |                                                                                                                                                                          |=================================================================================================================                                                 |  70%  |                                                                                                                                                                          |==================================================================================================================================                                |  80%  |                                                                                                                                                                          |==================================================================================================================================================                |  90%  |                                                                                                                                                                          |==================================================================================================================================================================| 100%
## Computing corrected count matrix for 4813 genes
##   |                                                                                                                                                                          |                                                                                                                                                                  |   0%  |                                                                                                                                                                          |================                                                                                                                                                  |  10%  |                                                                                                                                                                          |================================                                                                                                                                  |  20%  |                                                                                                                                                                          |=================================================                                                                                                                 |  30%  |                                                                                                                                                                          |=================================================================                                                                                                 |  40%  |                                                                                                                                                                          |=================================================================================                                                                                 |  50%  |                                                                                                                                                                          |=================================================================================================                                                                 |  60%  |                                                                                                                                                                          |=================================================================================================================                                                 |  70%  |                                                                                                                                                                          |==================================================================================================================================                                |  80%  |                                                                                                                                                                          |==================================================================================================================================================                |  90%  |                                                                                                                                                                          |==================================================================================================================================================================| 100%
## Calculating gene attributes
## Wall clock passed: Time difference of 12.44816 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
Seurat_HiC <- RunPCA(Seurat_HiC, assay = "SCT", verbose = FALSE)
# Select PC and res
PC=36
res=1
Seurat_HiC <- FindNeighbors(Seurat_HiC, reduction = "pca", dims = 1:PC)
## Computing nearest neighbor graph
## Computing SNN
Seurat_HiC <- FindClusters(Seurat_HiC, verbose = FALSE, resolution = res)
Seurat_HiC <- RunUMAP(Seurat_HiC, reduction = "pca", dims = 1:PC)
## 20:28:56 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:28:56 Read 1999 rows and found 36 numeric columns
## 20:28:56 Using Annoy for neighbor search, n_neighbors = 30
## 20:28:56 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:28:56 Writing NN index file to temp file /tmp/RtmpR2BkRs/filec60647df8208a
## 20:28:56 Searching Annoy index using 1 thread, search_k = 3000
## 20:28:57 Annoy recall = 100%
## 20:28:57 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:28:58 Initializing from normalized Laplacian + noise (using irlba)
## 20:28:58 Commencing optimization for 500 epochs, with 84430 positive edges
## 20:29:00 Optimization finished
umap_Spa <- list()
umap_Spa[["p1"]] <- DimPlot(Seurat_HiC, reduction = "umap", label = TRUE)
umap_Spa[["p2"]] <- SpatialDimPlot(Seurat_HiC, label = TRUE, label.size = 3, pt.size.factor = 1.6, stroke = NA)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
umap_Spa[["p2"]]$layers[[1]]$aes_params=c(umap_Spa[["p2"]]$layers[[1]]$aes_params, shape=22)
p <- ggarrange(plotlist = umap_Spa, ncol = 2, nrow = ceiling(length(umap_Spa)/2))
print(p)

anno <- rep(NA,ncol(Seurat_HiC))
anno[Seurat_HiC$seurat_clusters %in% c(0)] <- "H1"
anno[Seurat_HiC$seurat_clusters %in% c(1)] <- "H2"
anno[Seurat_HiC$seurat_clusters %in% c(2)] <- "H3"
anno[Seurat_HiC$seurat_clusters %in% c(3)] <- "H4"
anno[Seurat_HiC$seurat_clusters %in% c(4)] <- "H5"
anno[Seurat_HiC$seurat_clusters %in% c(5)] <- "H6"
anno[Seurat_HiC$seurat_clusters %in% c(6)] <- "H7"
anno[Seurat_HiC$seurat_clusters %in% c(7)] <- "H8"
anno[Seurat_HiC$seurat_clusters %in% c(8)] <- "H9"
Seurat_HiC$cell_type <- factor(anno,levels = paste0("H", 1:9))

my_color_palette <- brewer.pal(9, "Set1")
names(my_color_palette) <- levels(Seurat_HiC$cell_type)

umap_Spa <- list()
umap_Spa[["p1"]] <- DimPlot(Seurat_HiC, reduction = "umap", group.by = "cell_type", label = FALSE, pt.size = 0.5, cols = my_color_palette) + theme(aspect.ratio = 1)
umap_Spa[["p2"]] <- SpatialDimPlot(Seurat_HiC, group.by = "cell_type", label = FALSE, label.size = 3, pt.size.factor = 1.6, stroke = NA, cols = my_color_palette) + theme(aspect.ratio = 1)
umap_Spa[["p2"]]$layers[[1]]$aes_params=c(umap_Spa[["p2"]]$layers[[1]]$aes_params, shape=22)

p <- ggarrange(plotlist = umap_Spa, ncol = 2, nrow = ceiling(length(umap_Spa)/2))
print(p)

2. RNA seurat downstream analysis

set.seed(123)
work_path <- "/media/maoni/data/CZP/spatial_transcriptome/E13-23-RNA"
sample <- "E13-23-RNA"
setwd(work_path)
dir.create("6_seurat")
## Warning in dir.create("6_seurat"): '6_seurat'已存在
setwd(paste0(work_path, "/6_seurat"))
dir.create("Img")
## Warning in dir.create("Img"): 'Img'已存在
dir.create("Matrix")
## Warning in dir.create("Matrix"): 'Matrix'已存在
dir.create("Out")
## Warning in dir.create("Out"): 'Out'已存在
dir.create("Plot")
## Warning in dir.create("Plot"): 'Plot'已存在
file.copy(from=paste0(work_path, "/3_umi_tools/", sample, ".debarcoded_passed_reads_stat.csv"), 
          to=paste0(work_path, "/6_seurat/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
file.copy(from=paste0(work_path, "/4_zUMIs/", sample, ".debarcoded_passed_reads_stat_cropped.csv"), 
          to=paste0(work_path, "/6_seurat/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
file.copy(from=paste0(work_path, "/4_zUMIs/", sample, ".dgecounts.rds"), 
          to=paste0(work_path, "/6_seurat/Matrix/"), overwrite = TRUE, recursive = FALSE, copy.mode = TRUE)
## [1] TRUE
# prepare gene infor file
# perl -lane 'if(/gene_id\s.(.+?).\;.+?gene_name\s.(.+?).\;.+?gene_biotype\s.(.+?).\;/){print "$1\t$2\t$3"}' Mus_musculus.GRCm38.102.chr.gtf | sort -u > gGRCm38.102.gene.ids.names
gene.names <- read.table("/media/maoni/data/Reference/mouse/GRCm38_mm10/ensembl/gGRCm38.102.gene.ids.names", sep = "\t")

# remove unnecessary pseudogene, rRNA, snoRNA, tRNA, ribozyme
removed.genes1 <- gene.names$V2[grep("pseudogene|tRNA|rRNA|snoRNA|ribozyme|snRNA|scaRNA|misc_RNA|scRNA|sRNA", gene.names$V3)]
removed.genes2 <- gene.names$V2[grep("mt-|Rps|Rpl|Hba|Hbb", gene.names$V2)]  # 1829
removed.genes <- unique(c(removed.genes1, removed.genes2))  # 38300

allCounts <- readRDS(paste0("Matrix/", sample, ".dgecounts.rds"))
count_matrix <- allCounts$umicount$inex$all
count_matrix <- as.matrix(count_matrix)

raw_stat = read.table(paste0("Matrix/", sample, ".debarcoded_passed_reads_stat.csv"),sep =",", header = TRUE, dec =".", stringsAsFactors = F)
raw_stat_cell = paste(51-raw_stat$iA, raw_stat$iB, sep="x")
raw_stat_barcode = paste0(raw_stat$bc_B, raw_stat$bc_A)
colnames(count_matrix) <- raw_stat_cell[match(colnames(count_matrix),raw_stat_barcode)]
rownames(count_matrix) <- gene.names$V2[match(rownames(count_matrix), gene.names[,1])]

ID <- as.character(rownames(count_matrix))
d <- duplicated(ID)
ID <- factor(ID, levels = unique(ID))
count_matrix <- rowsum(as.matrix(count_matrix), ID, reorder = FALSE, na.rm = TRUE)
removed.gene <- intersect(rownames(count_matrix), removed.genes)
count_matrix <- count_matrix[-match(removed.gene, rownames(count_matrix)),]
dim(count_matrix)
## [1] 29905  2500
# 1 prepare cropped matrix
stat_cropped = read.table(paste0("Matrix/", sample, ".debarcoded_passed_reads_stat_cropped.csv"),sep =",", header = TRUE, dec =".", stringsAsFactors = F)
iA_iB <- paste(51-stat_cropped$iA, stat_cropped$iB, sep="x")
count_matrix_cropped <- count_matrix[, match(iA_iB,colnames(count_matrix))]
dim(count_matrix_cropped)
## [1] 29905  2045
# 2 create seurat object
assay = "Spatial"
slice = sample
image.dir = "/media/maoni/data/CZP/spatial_transcriptome/seurat/core/50*50_blank_img" # "./Img"
image.nam = paste0(sample,"_fix_1080.png") # "grey_pixel_1080p.png"
coord.nam = "combine_barcode.round2round1_index1_index2.Seurat.txt"
Seurat_RNA = CreateSeuratObject(counts = count_matrix_cropped, project = sample,assay = assay, min.cells = 3, min.features = 0) # min.cells min.features
image <- readPNG(source = file.path(image.dir, image.nam))[,,1:3]
## Warning in readPNG(source = file.path(image.dir, image.nam)): libpng warning: iCCP: known incorrect sRGB profile
scale.factors <- c("tissue_hires_scalef"=1, "fiducial_diameter_fullres"=1, "tissue_lowres_scalef"=1)
tissue.positions <- read.table(file = file.path(image.dir,coord.nam), col.names = c("barcodes", "tissue", "row", "col", "imagerow", "imagecol"), header = FALSE, as.is = TRUE, row.names = 1)
spot.radius <- 0.015 # estiamte:(0.13)*50/410/2
image <- new(Class = "VisiumV1", image = image, scale.factors = scalefactors(spot = scale.factors[1], fiducial = scale.factors[2], hires = scale.factors[1], lowres = scale.factors[3]), coordinates = tissue.positions, spot.radius = spot.radius)
image <- image[Cells(Seurat_RNA)]
DefaultAssay(object = image) <- assay
Seurat_RNA[[slice]] <- image
# 3 Data quality
plot1 <- VlnPlot(Seurat_RNA, features = c("nCount_Spatial", "nFeature_Spatial"), ncol = 3, pt.size = 0)
plot1

plot2 <- FeatureScatter(Seurat_RNA, feature1 = "nCount_Spatial", feature2 = "nFeature_Spatial")
plot2

plot3 <- SpatialFeaturePlot(Seurat_RNA, features = "nCount_Spatial", pt.size.factor = 1.6, stroke = NA) + theme(legend.position = "top")
plot3$layers[[1]]$aes_params=c(plot3$layers[[1]]$aes_params, shape=22)
plot4 <- SpatialFeaturePlot(Seurat_RNA, features = "nFeature_Spatial", pt.size.factor = 1.6, stroke = NA) + theme(legend.position = "top")
plot4$layers[[1]]$aes_params=c(plot4$layers[[1]]$aes_params, shape=22)
plot3 + plot4

# 4 Run PCA
Seurat_RNA <- SCTransform(Seurat_RNA, assay = "Spatial", verbose = FALSE)
Seurat_RNA <- RunPCA(Seurat_RNA, assay = "SCT", verbose = FALSE)
# Select PC and res
PC=30
res=0.8
Seurat_RNA <- FindNeighbors(Seurat_RNA, reduction = "pca", dims = 1:PC)
## Computing nearest neighbor graph
## Computing SNN
Seurat_RNA <- FindClusters(Seurat_RNA, verbose = FALSE, resolution = res)
Seurat_RNA <- RunUMAP(Seurat_RNA, reduction = "pca", dims = 1:PC)
## 20:29:32 UMAP embedding parameters a = 0.9922 b = 1.112
## 20:29:32 Read 2045 rows and found 30 numeric columns
## 20:29:32 Using Annoy for neighbor search, n_neighbors = 30
## 20:29:32 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 20:29:32 Writing NN index file to temp file /tmp/RtmpR2BkRs/filec60644ec9d08e
## 20:29:32 Searching Annoy index using 1 thread, search_k = 3000
## 20:29:32 Annoy recall = 100%
## 20:29:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 20:29:33 Initializing from normalized Laplacian + noise (using irlba)
## 20:29:33 Commencing optimization for 500 epochs, with 74804 positive edges
## 20:29:35 Optimization finished
umap_Spa <- list()
umap_Spa[["p1"]] <- DimPlot(Seurat_RNA, reduction = "umap", label = TRUE)
umap_Spa[["p2"]] <- SpatialDimPlot(Seurat_RNA, label = FALSE, label.size = 3, pt.size.factor = 1.6, stroke = NA)
umap_Spa[["p2"]]$layers[[1]]$aes_params=c(umap_Spa[["p2"]]$layers[[1]]$aes_params, shape=22)
p <- ggarrange(plotlist = umap_Spa, ncol = 2, nrow = ceiling(length(umap_Spa)/2))
print(p)

# 5 Aannotation
anno <- rep(NA,ncol(Seurat_RNA))
anno[Seurat_RNA$seurat_clusters %in% c(0)] <- "R3"
anno[Seurat_RNA$seurat_clusters %in% c(1)] <- "R9"
anno[Seurat_RNA$seurat_clusters %in% c(2)] <- "R2"
anno[Seurat_RNA$seurat_clusters %in% c(3)] <- "R10"
anno[Seurat_RNA$seurat_clusters %in% c(4)] <- "R8"
anno[Seurat_RNA$seurat_clusters %in% c(5,15)] <- "R11"
anno[Seurat_RNA$seurat_clusters %in% c(6,14)] <- "R12"
anno[Seurat_RNA$seurat_clusters %in% c(7,10)] <- "R13"
anno[Seurat_RNA$seurat_clusters %in% c(8)] <- "R7"
anno[Seurat_RNA$seurat_clusters %in% c(9)] <- "R5"
anno[Seurat_RNA$seurat_clusters %in% c(11)] <- "R14"
anno[Seurat_RNA$seurat_clusters %in% c(12)] <- "R16"
anno[Seurat_RNA$seurat_clusters %in% c(13)] <- "R4"
anno[Seurat_RNA$seurat_clusters %in% c(16)] <- "R1"
anno[Seurat_RNA$seurat_clusters %in% c(17)] <- "R6"
anno[Seurat_RNA$seurat_clusters %in% c(18)] <- "R15"

Seurat_RNA$cell_type <- factor(anno,levels = paste0("R", 1:16))

# set colors for each cluster
my_color_palette <- ArchRPalettes$bear
names(my_color_palette) <- paste0("R", 1:16)

umap_Spa <- list()
umap_Spa[["p1"]] <- DimPlot(Seurat_RNA, reduction = "umap", group.by = "cell_type", label = FALSE, pt.size = 0.5, cols = my_color_palette) + theme(aspect.ratio = 1)
umap_Spa[["p2"]] <- SpatialDimPlot(Seurat_RNA, group.by = "cell_type", label = FALSE, label.size = 3, pt.size.factor = 1.6, stroke = NA, cols = my_color_palette) + theme(aspect.ratio = 1)
umap_Spa[["p2"]]$layers[[1]]$aes_params=c(umap_Spa[["p2"]]$layers[[1]]$aes_params, shape=22)

p <- ggarrange(plotlist = umap_Spa, ncol = 2, nrow = ceiling(length(umap_Spa)/2))
print(p)

# 6 DE
Idents(Seurat_RNA) <- "cell_type"
de_markers <- FindAllMarkers(Seurat_RNA, only.pos = T, min.pct = 0.1, logfc.threshold = 0.25)  #8434
## Calculating cluster R1
## Calculating cluster R2
## Calculating cluster R3
## Calculating cluster R4
## Calculating cluster R5
## Calculating cluster R6
## Calculating cluster R7
## Calculating cluster R8
## Calculating cluster R9
## Calculating cluster R10
## Calculating cluster R11
## Calculating cluster R12
## Calculating cluster R13
## Calculating cluster R14
## Calculating cluster R15
## Calculating cluster R16
top50 <- de_markers %>% group_by(cluster) %>% top_n(n = 50, wt = avg_log2FC)  %>% arrange(cluster, desc(avg_log2FC))
head(de_markers)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj cluster    gene
## Nxph4   6.321021e-37   1.144369 0.701 0.150 1.539485e-32      R1   Nxph4
## Thsd7a  4.531952e-34   1.673563 1.000 0.873 1.103757e-29      R1  Thsd7a
## Dner    1.820668e-33   1.390474 1.000 0.823 4.434237e-29      R1    Dner
## Ina     2.838141e-31   1.479849 1.000 0.821 6.912293e-27      R1     Ina
## Ebf1    1.378339e-30   1.751752 1.000 0.952 3.356944e-26      R1    Ebf1
## Neurod2 1.427857e-30   1.048038 0.597 0.128 3.477546e-26      R1 Neurod2

3. Integrated HiC and RNA

gene_infor_500k <- read.csv("/media/maoni/data/Reference/mouse/GRCm38_mm10/ensembl/GRCm38_100_gtf_infor_hic_500kb_reference")
head(gene_infor_500k)
##    X  chr   start     end            gene_id     gene_name            gene_type          gene_region
## 1  1 chr1 3000000 3500000 ENSMUSG00000102693 4933401J01Rik                  TEC chr1:3000000-3500000
## 2  2 chr1 3000000 3500000 ENSMUSG00000064842       Gm26206                snRNA chr1:3000000-3500000
## 3  3 chr1 3000000 3500000 ENSMUSG00000051951          Xkr4       protein_coding chr1:3000000-3500000
## 4  4 chr1 3500000 4000000 ENSMUSG00000051951          Xkr4       protein_coding chr1:3500000-4000000
## 5 41 chr1 3000000 3500000 ENSMUSG00000102851       Gm18956 processed_pseudogene chr1:3000000-3500000
## 6  5 chr1 3000000 3500000 ENSMUSG00000103377       Gm37180                  TEC chr1:3000000-3500000
options(scipen = 0)
Idents(Seurat_RNA) <- "cell_type"
Idents(Seurat_HiC) <- "cell_type"
dim(Seurat_RNA)
## [1] 24355  2045
dim(Seurat_HiC)
## [1] 4813 1999
marker_genes <- list()
count = 1
for(i in unique(de_markers$cluster)){
  marker_genes[[count]] <- de_markers$gene[which(de_markers$cluster == i)][1:50]
  count = count + 1
}
names(marker_genes) <- unique(de_markers$cluster)

DefaultAssay(Seurat_HiC) <- "Spatial"
for(i in names(marker_genes)){
  marker_genes[[i]] <- intersect(rownames(Seurat_HiC), unique(gene_infor_500k$gene_region[which(gene_infor_500k$gene_name %in% marker_genes[[i]])]))
  Seurat_HiC@meta.data[,i] <- colMeans(as.matrix(GetAssayData(Seurat_HiC, assay = "Spatial", slot = "data")[marker_genes[[i]],]))
}

scale_rows_0_1 <- function(df) {
  t(apply(df, 1, function(x) {
    (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
  })) %>% as.data.frame()
}

tmp_meta <- as.matrix(Seurat_HiC@meta.data[,levels(Seurat_RNA)])
rownames(tmp_meta) <- paste0(Seurat_HiC$cell_type)
tmp_meta <- limma::avereps(tmp_meta)
tmp_meta0 <- apply(tmp_meta, 2, function(x){scale(x)})

tmp_meta0 <- scale_rows_0_1(tmp_meta0)
rownames(tmp_meta0) <- rownames(tmp_meta)
tmp_meta0 <- tmp_meta0[levels(Seurat_HiC$cell_type),]

tmp_meta0
##            R1         R2         R3         R4         R5        R6        R7         R8         R9        R10        R11         R12       R13        R14        R15
## H1 0.19200503 0.21441181 0.24580224 0.18611535 0.28906078 0.0000000 0.4147610 0.59247182 0.98491396 0.83152167 1.00000000 0.514765178 0.6697356 0.70790729 0.77857542
## H2 0.93463943 0.91180035 0.95465617 1.00000000 0.19518589 0.5189426 0.1839964 0.10548906 0.23890219 0.09257173 0.15762798 0.062759069 0.0000000 0.55037882 0.03517193
## H3 0.18473007 0.13414849 0.12992744 0.07326413 0.16968987 0.0000000 0.3234831 0.41885274 0.84450170 0.87709174 0.77900078 0.377746200 0.5756097 1.00000000 0.73272578
## H4 1.00000000 0.97051765 0.94351908 0.90212818 0.58345241 0.5724062 0.2264139 0.11515164 0.20028151 0.19923066 0.19366199 0.189222109 0.1074409 0.33221180 0.13113711
## H5 0.31997151 0.36765268 0.39298110 0.39800364 0.04865987 0.0000000 0.3050282 0.59289895 0.98237705 1.00000000 0.98443938 0.484341168 0.6061849 0.13262992 0.75395178
## H6 0.34001685 0.40100129 0.34785359 0.28580902 1.00000000 0.8130741 0.1111577 0.06266884 0.06577101 0.13475616 0.12911012 0.008717921 0.1424550 0.42303580 0.00000000
## H7 0.11925385 0.01188567 0.04394948 0.21105090 0.41390040 0.6772816 0.9201446 1.00000000 0.00000000 0.29544932 0.05761302 0.925521751 0.6712854 0.13247505 0.48866154
## H8 0.01187082 0.01044891 0.00000000 0.02773607 0.51255269 0.4752327 1.0000000 0.54910493 0.43951397 0.43627716 0.55380247 0.662465853 0.8371760 0.04464643 0.76505066
## H9 0.25083535 0.50158586 0.41697950 0.32132576 0.61810875 0.5665755 0.3794388 0.58388640 0.21642373 0.01151493 0.13332405 1.000000000 0.4654085 0.00000000 0.37539944
##           R16
## H1 0.77529814
## H2 0.16328000
## H3 0.73433358
## H4 0.00000000
## H5 0.69259863
## H6 0.05342795
## H7 0.25172475
## H8 0.89397808
## H9 0.58668366
mat <- as.matrix(tmp_meta0)
pc <- prcomp(mat, scale. = TRUE)
row_order <- order(pc$x[, 1])
col_order <- order(pc$rotation[, 1])

whitePurple = c("9"='#f7fcfd',"6"='#e0ecf4',"8"='#bfd3e6',"5"='#9ebcda',"2"='#8c96c6',"4"='#8c6bb1',"7"='#88419d',"3"='#810f7c',"1"='#4d004b')
p <- pheatmap::pheatmap(
  mat[row_order, col_order],
  cellwidth = 20,
  cellheight = 20,
  cluster_rows = FALSE,
  cluster_cols = FALSE,
  color = colorRampPalette(whitePurple)(100),
  filename = NA
)